#install.packages(c("forecast", "expsmooth", "seasonal"))
library(TTR)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
library(expsmooth)
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.4.2 ✔ fma 2.5
##
library(seasonal)
library(MASS)
##
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
##
## cement, housing, petrol
library(stats)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(forecast)
library(ggplot2)
library(tseries)
library(imputeTS)
##
## Attaching package: 'imputeTS'
## The following object is masked from 'package:tseries':
##
## na.remove
library(vars)
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following object is masked from 'package:imputeTS':
##
## na.locf
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
library(timetk)
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
library(lmtest)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(xts)
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
path <- "/Users/aashishsingh/Desktop/Time Series - Final Project/"
pm2_5_data <- read.csv(paste0(path, "la_pm2_5_pollutant_data.csv"),
na.strings=c("", "NA"))
ozone_data <- read.csv(paste0(path, "la_ozone_pollutant_data.csv"),
na.strings=c("", "NA"))
# Convert data into ts objects
pm2_5_ts <- ts(pm2_5_data$PM2.5.AQI.Value, start = c(1999,1,1), frequency = 365.25)
ozone_ts <- ts(ozone_data$Ozone.AQI.Value, start = c(1999,1,1), frequency = 365.25)
plot(pm2_5_ts, main="Los Angeles PM 2.5 AQI")
plot(ozone_ts, main="Los Angeles Ozone AQI")
kpss.test(pm2_5_ts)
## Warning in kpss.test(pm2_5_ts): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: pm2_5_ts
## KPSS Level = 6.951, Truncation lag parameter = 12, p-value = 0.01
# The reported p-value is 0.01, which is smaller than 0.05, and would suggest
# that we reject the null hypothesis of level stationarity and conclude that
# there is evidence that the data is non-stationary
adf.test(pm2_5_ts)
## Warning in adf.test(pm2_5_ts): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: pm2_5_ts
## Dickey-Fuller = -14.554, Lag order = 20, p-value = 0.01
## alternative hypothesis: stationary
# Similarly the Augumented Dickey-Fuller test results in p-value of 0.01 (<0.05)
# where we do reject the null hypothesis that the time series is non-stationary
# and thus data is stationary.
# These are opposite results so we use ACF PACF plots to see if the series is stationary
acf(pm2_5_ts, main = "ACF of Original Time Series")
pacf(pm2_5_ts, main = "PACF of Original Time Series")
#### Step 3: Prepare data for Weekly & Monthly extraction and EDA
# Create data with Dates
pm2_5_df <- as.data.frame(pm2_5_ts)
pm2_5_df$Date <- mdy(pm2_5_data$Date)
colnames(pm2_5_df) <- c("PM2.5.AQI.Value", "Date")
# Convert to xts format for weekly & monthly
pm2_5_xts <- as.xts(pm2_5_df, order.by=pm2_5_df$Date)
pm2_5_xts <- pm2_5_xts[,-2]
# Let's see how seasonality looks over time and is the variance changing
pm2_5_df$Month <- month(pm2_5_df$Date)
pm2_5_df$Year <- year(pm2_5_df$Date)
avg_aqi_by_month_year <- pm2_5_df %>%
group_by(pm2_5_df$Year, pm2_5_df$Month) %>%
summarise(
avg_value = mean(PM2.5.AQI.Value)
)
## `summarise()` has grouped output by 'pm2_5_df$Year'. You can override using the
## `.groups` argument.
colnames(avg_aqi_by_month_year) <- c("Year", "Month", "avg_value")
reshape_avg_aqi_by_month_year <-
sqldf(
"SELECT
Month,
MAX(CASE WHEN Year = 1999 THEN avg_value END) AS Year_1999,
MAX(CASE WHEN Year = 2000 THEN avg_value END) AS Year_2000,
MAX(CASE WHEN Year = 2001 THEN avg_value END) AS Year_2001,
MAX(CASE WHEN Year = 2002 THEN avg_value END) AS Year_2002,
MAX(CASE WHEN Year = 2003 THEN avg_value END) AS Year_2003,
MAX(CASE WHEN Year = 2004 THEN avg_value END) AS Year_2004,
MAX(CASE WHEN Year = 2005 THEN avg_value END) AS Year_2005,
MAX(CASE WHEN Year = 2006 THEN avg_value END) AS Year_2006,
MAX(CASE WHEN Year = 2007 THEN avg_value END) AS Year_2007,
MAX(CASE WHEN Year = 2008 THEN avg_value END) AS Year_2008,
MAX(CASE WHEN Year = 2009 THEN avg_value END) AS Year_2009,
MAX(CASE WHEN Year = 2010 THEN avg_value END) AS Year_2010,
MAX(CASE WHEN Year = 2011 THEN avg_value END) AS Year_2011,
MAX(CASE WHEN Year = 2012 THEN avg_value END) AS Year_2012,
MAX(CASE WHEN Year = 2013 THEN avg_value END) AS Year_2013,
MAX(CASE WHEN Year = 2014 THEN avg_value END) AS Year_2014,
MAX(CASE WHEN Year = 2015 THEN avg_value END) AS Year_2015,
MAX(CASE WHEN Year = 2016 THEN avg_value END) AS Year_2016,
MAX(CASE WHEN Year = 2017 THEN avg_value END) AS Year_2017,
MAX(CASE WHEN Year = 2018 THEN avg_value END) AS Year_2018,
MAX(CASE WHEN Year = 2019 THEN avg_value END) AS Year_2019,
MAX(CASE WHEN Year = 2020 THEN avg_value END) AS Year_2020,
MAX(CASE WHEN Year = 2021 THEN avg_value END) AS Year_2021,
MAX(CASE WHEN Year = 2022 THEN avg_value END) AS Year_2022,
MAX(CASE WHEN Year = 2023 THEN avg_value END) AS Year_2023
FROM avg_aqi_by_month_year
GROUP BY Month
ORDER BY Month"
)
colnames(reshape_avg_aqi_by_month_year) <- c(
"Month", "1999", "2000", "2001", "2002", "2003", "2004", "2005", "2006",
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015",
"2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023")
boxplot(reshape_avg_aqi_by_month_year[2:26])
# We can see that over the years there is downward trend and variance is decreasing
# except 2020 when we see a high peak likely caused by wildfires
# Link: https://en.wikipedia.org/wiki/Lake_Fire_(2020)
pm2_5_weekly <- apply.weekly(pm2_5_xts, mean)
pm2_5_weekly_ts <- ts(pm2_5_weekly["19990103/20230811"],
start = c(1999,1),
frequency = 52.18)
plot(pm2_5_weekly_ts)
# Strong seasonality, Strong cyclic, light downward trend, variance is reducing
kpss.test(pm2_5_weekly_ts)
## Warning in kpss.test(pm2_5_weekly_ts): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: pm2_5_weekly_ts
## KPSS Level = 3.4904, Truncation lag parameter = 7, p-value = 0.01
adf.test(pm2_5_weekly_ts)
## Warning in adf.test(pm2_5_weekly_ts): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: pm2_5_weekly_ts
## Dickey-Fuller = -8.4636, Lag order = 10, p-value = 0.01
## alternative hypothesis: stationary
acf(pm2_5_weekly_ts, main = "ACF of Weekly Time Series")
pacf(pm2_5_weekly_ts, main = "PACF of Weekly Time Series")
# Split the data into a training and test dataset
train_weekly <- window(pm2_5_weekly_ts, start = c(1999,1), end=c(2020,52))
test_weekly <- window(pm2_5_weekly_ts, start = c(2021,1))
# Looking at spectral analysis
periodogram(train_weekly, log = "no", plot=TRUE,
ylab = "Periodogram",
xlab = "Frequency",
lwd=2, xlim = c(0, 0.06))
# There are two trends:
# 1) A typical yearly (52 weeks) seasonal trend
# 2) A trend that is repeating every 9.6 years (500 weeks)
# 3) A typical half yearly (26 weeks) seasonal trend
# Overall, there is no mixed effect that normal seasonality model cannot capture.
# Decompose the series
plot(decompose(train_weekly, type="multiplicative"))
# Important Inferences
# 1) The PM2.5 AQI has been decreasing overall though there are rise every now and then.
# However the trend is going down with time.
# 2) Winter months have a strong seasonal effect with Nov and Dec being the peak months
# usually which likely could be due to cold temperatures causing pollutant to
# not escape the lower atmosphere.
# Link: https://www.accuweather.com/en/health-wellness/why-air-pollution-is-worse-in-winter/689434#:~:text=Cold%20air%20is%20denser%20and%20moves%20slower%20than%20warm%20air,rate%20than%20during%20the%20summer.
# 3) We can see a seasonal cycle of 12 months where the mean value of each month starts
# with a increasing trend in the beginning of the year and drops down towards
# the end of the year. We can see a seasonal effect with a cycle of 12 months.
# Understand the seasonality and remove it to see the trend
train_weekly_diff <- diff(train_weekly, lag = 52)
autoplot(train_weekly_diff, ylab="Train Seasonal Differencing - Weekly Data")
# Let's look if the series is stationary?
kpss.test(train_weekly_diff)
## Warning in kpss.test(train_weekly_diff): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: train_weekly_diff
## KPSS Level = 0.17702, Truncation lag parameter = 7, p-value = 0.1
# The reported p-value is 0.1, which is > 0.05, and would suggest that we fail
# to reject the null hypothesis of level stationarity (conclusion: stationary)
adf.test(train_weekly_diff)
## Warning in adf.test(train_weekly_diff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train_weekly_diff
## Dickey-Fuller = -8.5534, Lag order = 10, p-value = 0.01
## alternative hypothesis: stationary
# The reported p-value of 0.01 (<0.05) so we do reject the null hypothesis that
# the time series is non-stationary (conclusion: stationary)
acf(train_weekly_diff, main = "ACF of Seasonal Differencing Time Series - Weekly Data")
pacf(train_weekly_diff, main = "PACF of Seasonal Differencing Time Series - Weekly Data")
# Forecast the seasonal naïve for (2021-01 to 2023-07)
forecast_snaive_weekly <- snaive(train_weekly, h=110)
## Warning in lag.default(y, -lag): 'k' is not an integer
# Plot the forecasts for snaive model
plot(forecast_snaive_weekly, main = "PM 2.5 AQI Forecast - SNaive (Weekly)",
xlab = "Week", ylab = "PM 2.5 AQI")
lines(test_weekly)
# Compare the forecast with the actual test data by calculating MAPE and MSE
# Mean Absolute Percentage Error (MAPE)
mape_snaive_weekly <- mean(abs((test_weekly - forecast_snaive_weekly$mean)/test_weekly))
mape_snaive_weekly
## [1] 0.2749545
# Mean Absolute Error (MAE)
mae_snaive_weekly <- mean(abs((test_weekly - forecast_snaive_weekly$mean)))
mae_snaive_weekly
## [1] 16.28571
# Mean Squared Error (MSE)
mse_snaive_weekly <- mean((test_weekly - forecast_snaive_weekly$mean)^2)
mse_snaive_weekly
## [1] 599.6278
# Evaluate the residuals
checkresiduals(forecast_snaive_weekly)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 509.98, df = 104.36, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 104.36
# Build the exponential smoothing model
model_ets_weekly <- ets(train_weekly, lambda="auto", additive.only=TRUE)
## Warning in ets(train_weekly, lambda = "auto", additive.only = TRUE): I can't
## handle data with frequency greater than 24. Seasonality will be ignored. Try
## stlf() if you need seasonal forecasts.
summary(model_ets_weekly)
## ETS(A,N,N)
##
## Call:
## ets(y = train_weekly, additive.only = TRUE, lambda = "auto")
##
## Box-Cox transformation: lambda= -0.5881
##
## Smoothing parameters:
## alpha = 0.2953
##
## Initial states:
## l = 1.5899
##
## sigma: 0.0179
##
## AIC AICc BIC
## -1146.438 -1146.417 -1131.304
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.821839 16.44366 11.86525 -1.428602 16.4125 0.7341754 0.1189196
forecast_ets_weekly <- forecast(model_ets_weekly, h=110)
# Plot the forecasts for ETS model
plot(forecast_ets_weekly, main = "PM 2.5 AQI Forecast - ETS",
xlab = "Month", ylab = "PM 2.5 AQI")
lines(test_weekly)
# Compare the forecast with the actual test data by calculating MAPE and MSE
# Mean Absolute Percentage Error (MAPE)
mape_ets_weekly <- mean(abs((test_weekly - forecast_ets_weekly$mean)/test_weekly))
mape_ets_weekly
## [1] 0.3342543
# Mean Absolute Error (MAE)
mae_ets_weekly <- mean(abs((test_weekly - forecast_ets_weekly$mean)))
mae_ets_weekly
## [1] 19.10983
# Mean Squared Error (MSE)
mse_ets_weekly <- mean((test_weekly - forecast_ets_weekly$mean)^2)
mse_ets_weekly
## [1] 434.2529
# Evaluate the residuals
checkresiduals(forecast_ets_weekly)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,N)
## Q* = 202.74, df = 104.36, p-value = 2.727e-08
##
## Model df: 0. Total lags used: 104.36
Box.test(residuals(forecast_ets_weekly), lag = 52, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(forecast_ets_weekly)
## X-squared = 103.65, df = 52, p-value = 2.742e-05
model_nn_weekly <- nnetar(train_weekly,
size=5,
decay=0.5,
repeats=1000,
lambda="auto")
summary(model_nn_weekly)
## Length Class Mode
## x 1147 ts numeric
## m 1 -none- numeric
## p 1 -none- numeric
## P 1 -none- numeric
## scalex 2 -none- list
## size 1 -none- numeric
## lambda 1 -none- numeric
## subset 1147 -none- numeric
## model 1000 nnetarmodels list
## nnetargs 1 -none- list
## fitted 1147 ts numeric
## residuals 1147 ts numeric
## lags 19 -none- numeric
## series 1 -none- character
## method 1 -none- character
## call 6 -none- call
forecast_nn_weekly <- forecast(model_nn_weekly, PI=FALSE, h=110)
# Plot the forecasts for NN model
plot(forecast_nn_weekly, main = "PM 2.5 AQI Forecast - Neural Network",
xlab = "Weekly", ylab = "PM 2.5 AQI")
lines(test_weekly)
# Compare the forecast with the actual test data by calculating MAPE and MSE
# Mean Absolute Percentage Error (MAPE)
mape_nn_weekly <- mean(abs((test_weekly - forecast_nn_weekly$mean)/test_weekly))
mape_nn_weekly
## [1] 0.1910434
# Mean Absolute Error (MAE)
mae_nn_weekly <- mean(abs((test_weekly - forecast_nn_weekly$mean)))
mae_nn_weekly
## [1] 11.29234
# Mean Squared Error (MSE)
mse_nn_weekly <- mean((test_weekly - forecast_nn_weekly$mean)^2)
mse_nn_weekly
## [1] 195.8605
# Evaluate the residuals
checkresiduals(forecast_nn_weekly)
##
## Ljung-Box test
##
## data: Residuals from NNAR(18,1,5)[52]
## Q* = 107.35, df = 104.36, p-value = 0.401
##
## Model df: 0. Total lags used: 104.36
# Create monthly data
pm2_5_monthly <- apply.monthly(pm2_5_xts, mean)
pm2_5_monthly_ts <- ts(pm2_5_monthly["19990131/20230811"], start = c(1999,1), frequency = 12)
plot(pm2_5_monthly_ts)
kpss.test(pm2_5_monthly_ts)
## Warning in kpss.test(pm2_5_monthly_ts): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: pm2_5_monthly_ts
## KPSS Level = 1.9774, Truncation lag parameter = 5, p-value = 0.01
adf.test(pm2_5_monthly_ts)
## Warning in adf.test(pm2_5_monthly_ts): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: pm2_5_monthly_ts
## Dickey-Fuller = -7.4466, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
acf(pm2_5_monthly_ts, main = "ACF of Monthly Time Series")
pacf(pm2_5_monthly_ts, main = "PACF of Monthly Time Series")
# Split the data into a training and test dataset
train_monthly <- window(pm2_5_monthly_ts, start = c(1999,1), end=c(2020,12))
test_monthly <- window(pm2_5_monthly_ts, start = c(2021,1))
# Looking at spectral analysis
periodogram(train_monthly, log = "no", plot=TRUE,
ylab = "Periodogram",
xlab = "Frequency",
lwd=2, xlim = c(0, 0.2))
# There are two trends:
# 1) A typical yearly (12 months) seasonal trend
# 2) A trend that is repeating every 8-9 years (100 months)
# 3) A typical half yearly (6 months) seasonal trend
# Overall, there is no mixed effect that normal seasonality model cannot capture.
# Decompose the series
plot(decompose(train_monthly, type="multiplicative"))
# Important Inferences
# 1) The PM2.5 AQI has been decreasing overall though there are rise every now and then.
# However the trend is going down with time.
# 2) Winter months have a strong seasonal effect with Nov and Dec being the peak months
# usually which likely could be due to cold temperatures causing pollutant to
# not escape the lower atmosphere.
# Link: https://www.accuweather.com/en/health-wellness/why-air-pollution-is-worse-in-winter/689434#:~:text=Cold%20air%20is%20denser%20and%20moves%20slower%20than%20warm%20air,rate%20than%20during%20the%20summer.
# 3) We can see a seasonal cycle of 12 months where the mean value of each month starts
# with a increasing trend in the beginning of the year and drops down towards
# the end of the year. We can see a seasonal effect with a cycle of 12 months.
# Understand the seasonality and remove it to see the trend
train_monthly_diff <- diff(train_monthly, lag = 12)
autoplot(train_monthly_diff, ylab="Train Seasonal Differencing (Monthly)")
# Let's look if the series is stationary?
kpss.test(train_monthly_diff)
## Warning in kpss.test(train_monthly_diff): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: train_monthly_diff
## KPSS Level = 0.14573, Truncation lag parameter = 5, p-value = 0.1
# The reported p-value is 0.1, which is > 0.05, and would suggest that we fail
# to reject the null hypothesis of level stationarity (conclusion: stationary)
adf.test(train_monthly_diff)
## Warning in adf.test(train_monthly_diff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train_monthly_diff
## Dickey-Fuller = -5.1056, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# The reported p-value of 0.01 (<0.05) so we do reject the null hypothesis that
# the time series is non-stationary (conclusion: stationary)
acf(train_monthly_diff, main = "ACF of Seasonal Differencing Time Series")
pacf(train_monthly_diff, main = "PACF of Seasonal Differencing Time Series")
# Forecast the seasonal naïve for (2021-01 to 2023-07)
forecast_snaive_monthly <- snaive(train_monthly, h=31)
# Plot the forecasts for snaive model
plot(forecast_snaive_monthly, main = "PM 2.5 AQI Forecast - SNaive",
xlab = "Month", ylab = "PM 2.5 AQI")
lines(test_monthly)
# Compare the forecast with the actual test data by calculating MAPE and MSE
# Mean Absolute Percentage Error (MAPE)
mape_snaive_monthly <- mean(abs((test_monthly - forecast_snaive_monthly$mean)/test_monthly))
mape_snaive_monthly
## [1] 0.20778
# Mean Absolute Error (MAE)
mae_snaive_monthly <- mean(abs((test_monthly - forecast_snaive_monthly$mean)))
mae_snaive_monthly
## [1] 12.00976
# Mean Squared Error (MSE)
mse_snaive_monthly <- mean((test_monthly - forecast_snaive_monthly$mean)^2)
mse_snaive_monthly
## [1] 282.8901
# Evaluate the residuals
checkresiduals(forecast_snaive_monthly)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 82.387, df = 24, p-value = 2.523e-08
##
## Model df: 0. Total lags used: 24
# Build the exponential smoothing model
model_ets_monthly <- ets(train_monthly, lambda="auto")
summary(model_ets_monthly)
## ETS(A,N,A)
##
## Call:
## ets(y = train_monthly, lambda = "auto")
##
## Box-Cox transformation: lambda= -0.0996
##
## Smoothing parameters:
## alpha = 0.238
## gamma = 1e-04
##
## Initial states:
## l = 3.5468
## s = 0.0787 0.0513 0.0374 0.0224 0.0067 0.0386
## -0.0077 -0.0477 -0.1042 -0.0735 -0.0636 0.0616
##
## sigma: 0.0924
##
## AIC AICc BIC
## 230.3216 232.2571 283.9609
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.4199959 10.02138 7.654152 -1.045442 10.71358 0.7823768 0.1335483
forecast_ets_monthly <- forecast(model_ets_monthly, h=31)
# Plot the forecasts for ETS model
plot(forecast_ets_monthly, main = "PM 2.5 AQI Forecast - ETS",
xlab = "Month", ylab = "PM 2.5 AQI")
lines(test_monthly)
# Compare the forecast with the actual test data by calculating MAPE and MSE
# Mean Absolute Percentage Error (MAPE)
mape_ets_monthly <- mean(abs((test_monthly - forecast_ets_monthly$mean)/test_monthly))
mape_ets_monthly
## [1] 0.1962783
# Mean Absolute Error (MAE)
mae_ets_monthly <- mean(abs((test_monthly - forecast_ets_monthly$mean)))
mae_ets_monthly
## [1] 11.06968
# Mean Squared Error (MSE)
mse_ets_monthly <- mean((test_monthly - forecast_ets_monthly$mean)^2)
mse_ets_monthly
## [1] 163.3788
# Evaluate the residuals
checkresiduals(forecast_ets_monthly)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 63.91, df = 24, p-value = 1.757e-05
##
## Model df: 0. Total lags used: 24
Box.test(residuals(forecast_ets_monthly), lag = 12, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(forecast_ets_monthly)
## X-squared = 33.762, df = 12, p-value = 0.0007353
model_nn_monthly <- nnetar(train_monthly,
P=12,
size=5,
decay=0.5,
repeats=1000,
lambda="auto")
summary(model_nn_monthly)
## Length Class Mode
## x 264 ts numeric
## m 1 -none- numeric
## p 1 -none- numeric
## P 1 -none- numeric
## scalex 2 -none- list
## size 1 -none- numeric
## lambda 1 -none- numeric
## subset 264 -none- numeric
## model 1000 nnetarmodels list
## nnetargs 1 -none- list
## fitted 264 ts numeric
## residuals 264 ts numeric
## lags 16 -none- numeric
## series 1 -none- character
## method 1 -none- character
## call 7 -none- call
forecast_nn_monthly <- forecast(model_nn_monthly, PI=FALSE, h=31)
# Plot the forecasts for NN model
plot(forecast_nn_monthly, main = "PM 2.5 AQI Forecast - Neural Network",
xlab = "Month", ylab = "PM 2.5 AQI")
lines(test_monthly)
# Compare the forecast with the actual test data by calculating MAPE and MSE
# Mean Absolute Percentage Error (MAPE)
mape_nn_monthly <- mean(abs((test_monthly - forecast_nn_monthly$mean)/test_monthly))
mape_nn_monthly
## [1] 0.1453023
# Mean Absolute Error (MAE)
mae_nn_monthly <- mean(abs((test_monthly - forecast_nn_monthly$mean)))
mae_nn_monthly
## [1] 8.341272
# Mean Squared Error (MSE)
mse_nn_monthly <- mean((test_monthly - forecast_nn_monthly$mean)^2)
mse_nn_monthly
## [1] 109.5097
# Evaluate the residuals
checkresiduals(forecast_nn_monthly)
##
## Ljung-Box test
##
## data: Residuals from NNAR(4,12,5)[12]
## Q* = 14.63, df = 24, p-value = 0.9311
##
## Model df: 0. Total lags used: 24